require(ClusterR)
## Loading required package: ClusterR
## Loading required package: gtools
require(scales)
## Loading required package: scales
require(CytoML)
## Loading required package: CytoML
library(reshape2)
# https://github.com/RGLab/flowWorkspace/issues/231
popsOfInterest = c("effector memory", "central memory", "naive", "effector")
gateKmeansWsp = function(gs,
fcsFile,
outputDir,
min = -20,
max = 250,
k = 4,
num_init = 150,
max_iters = 5000,
nodesOfInterest = c("Helper Tcells-CD4+", "cytotoxic Tcells-CD8+"),
markersToCluster = c("CCR7", "CD45RA", "CD28"),
addComp = FALSE) {
dir.create(outputDir)
outputRoot = paste0(outputDir, fcsFile)
clustFile = paste0(outputRoot, ".clusterData")
combo = data.frame(ht = getIndiceMat(gs, nodesOfInterest[1])[, 1],
ct = getIndiceMat(gs, nodesOfInterest[1])[, 1])
combo$MDEF = combo$ct | combo$ht
fullDef=data.frame(MDEF=combo$MDEF)
allNodes=getNodes(gs)
for(node in allNodes){
tmp = getIndiceMat(gs, node)[, 1]
fullDef=cbind(fullDef,tmp)
}
colnames(fullDef)=allNodes
subto=5000
fullDef=fullDef[1:min(subto,length(fullDef[,1])),]
gh <- gs[[1]]
subdata = getData(gh)[combo$MDEF, ]
channels = colnames(subdata)[grep("Comp", colnames(subdata))]
# markersToCluster= markernames(subdata)[grep("Comp", colnames(subdata))]
if (!file.exists(clustFile)) {
# getMar
# for (marker in markersToCluster) {
# if (addComp) {
# channels = c(channels, c(paste0(
# "Comp-",
# getChannelMarker(frame, marker)$name
# )))
# } else{
# channels = c(channels, c(paste0(
# getChannelMarker(frame, marker)$name
# )))
# }
# }
# print(getData(gh))
t = as.data.frame(subdata@exprs)[, channels]
for (channel in channels) {
t[, channel] = squish(t[, channel], range = c(min, max))
}
clust = center_scale(t[, channels])
clust =clust[1:min(subto,length(clust[,1])),]
# colnames(clust) = markersToCluster
print(paste0("clustering sample ", fcsFile))
km_rc = KMeans_rcpp(
clust,
clusters = k,
num_init = num_init,
max_iters = max_iters,
tol = 1e-06,
initializer = 'optimal_init',
# threads = 4,
verbose = F,
seed = 42
)
clust = as.data.frame(clust)
colnames(clust)=channels
clust$KMEANS_CLUSTER = km_rc$clusters
print(paste0("running phenograph for sample ", fcsFile))
clusterPhenograph = cytof_cluster(xdata = clust, method = "Rphenograph")
clust$PHENOGRAPH = clusterPhenograph
print(paste0("running tsne for sample ", fcsFile))
tsne <- cytof_dimReduction(data = clust,
method = "tsne",
out_dim = 2)
cluster_DensVM <- cytof_cluster(xdata = clust,
ydata = tsne, method = "DensVM")
clust = cbind(clust,tsne)
clust$CLUSTER_DENSVM=cluster_DensVM
save(clust, file = clustFile)
}else{
load(clustFile)
}
clust=cbind(clust,fullDef)
optKN = 16
o3_t = Optimal_Clusters_KMeans(
clust[,channels],
max_clusters = optKN,
plot_clusters = TRUE,
criterion = 'distortion_fK',
tol = 1e-06,
seed = 42
)
tsalpha = .5
tsSize =1
methods = c("KMEANS_CLUSTER", "PHENOGRAPH","CLUSTER_DENSVM")
for (method in methods) {
clust$CURRENT_COLOR=clust[,method]
pb2 = ggplot(clust) +
geom_point(
alpha = tsalpha,
aes(
x = tsne_1,
y = tsne_2,
colour = CURRENT_COLOR
),
size = tsSize
) +
xlab(paste0("tsne1")) +
ylab(paste0("tsne2")) + ggtitle(paste0(method, "\n", samp)) +
theme(legend.position = "bottom")
print(pb2)
# + t2
}
}
require(flowCore)
## Loading required package: flowCore
require(cytofkit)
## Loading required package: cytofkit
## Loading required package: ggplot2
## Loading required package: plyr
library(ggcyto)
## Loading required package: ncdfFlow
## Loading required package: RcppArmadillo
## Loading required package: BH
## Loading required package: flowWorkspace
library(ClusterR)
library(gridExtra)
library(grid)
library(scales)
library(cluster)
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
##
## rename, round_any
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
library(openCyto)
alpha = 0.05
reCluster = TRUE
runPhenograph = FALSE
inputDir = "/Volumes/Beta2/flow/testNovels/"
wsDir = "/Volumes/Beta2/flow/wsp/"
wsps <-
list.files(wsDir,
pattern = "wsp$",
full = TRUE,
recursive = TRUE)
print(paste0("Found ", length(wsps), " wsps"))
## [1] "Found 33 wsps"
#
# for (file in wsps) {
# ws <- openWorkspace(file)
# s = getSamples(ws)
# print(length(s$name))
# s=s[grepl("PANEL 1",s$name),]
# print(length(s$name))
#
# for (samp in s$name) {
# if(runif(1)[1]<.15){
# command = paste0(
# "rsync -av \'msi:/scratch.global/lanej/flow/full/fcs/",
# gsub(" ", "\\\ ", samp, fixed = TRUE),
# "' ",
# inputDir
# )
# system(command)
# }
# }
# }
markersToCluster = c("CCR7", "CD45RA", "CD28")
outputDir = "/Volumes/Beta2/flow/novelResults/"
wsMapFileFile = "/Volumes/Beta2/flow/novelResults/fcsLOCALMAP.manual.txt"
mapper = read.delim(wsMapFileFile,
stringsAsFactors = FALSE,
sep = "\t")
targets =
list.files(inputDir,
pattern = ".fcs$",
full = FALSE)
for (samp in mapper$FCS) {
if (file.exists(paste(inputDir, samp, sep = ""))) {
wsFile = mapper[which(mapper$FCS == samp),]$WSP
print(wsFile)
ws <- openWorkspace(wsFile)
print(samp)
#read the fcs file
frame = read.FCS(paste(inputDir, samp, sep = ""))
id = samp
#load the workspace for this file, apply transforms, compensation, and gates
gs <-
parseWorkspace(
ws,
#WSP file
path = inputDir,
#FCS file
name = 1,
#sample group
subset = id[1],
#load single fcs file
isNcdf = FALSE,
#not memory mapped
compensation = compensation(keyword(frame)$`SPILL`)
)
min = -20
max = 250
k = 4
num_init = 10
max_iters = 5000
# nodesOfInterest = c("Helper Tcells-CD4+", "cytotoxic Tcells-CD8+")
nodesOfInterest = c("Helper Tcells-CD4+")
markersToCluster = c("CCR7", "CD45RA", "CD28")
addComp = TRUE
# for (node in nodesOfInterest) {
print(samp)
fcsFile = samp
outputDir = "/Volumes/Beta2/flow/novelResults/"
gateKmeansWsp(
gs = gs,
fcsFile = samp,
outputDir = "/Volumes/Beta2/flow/novelResults/",
addComp = TRUE ,
num_init = 10,
max_iters = 5000
)
}
}
## [1] "/Volumes/Beta2/flow/wsp//2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs_panel1Rename.wsp"
## [1] "2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs
## Compensating
## gating ...
## done!
## [1] "2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## [1] "running phenograph for sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Running PhenoGraph...
## Run Rphenograph starts:
## -Input data of 5000 rows and 13 columns
## -k is set to 30
## Finding nearest neighbors...DONE ~ 0.585 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.764 s
## Build undirected graph from the weighted links...DONE ~ 0.361 s
## Run louvain clustering on the graph ...DONE ~ 0.386 s
## Run Rphenograph DONE, took a total of 3.09599999999999s.
## Return a community class
## -Modularity value: 0.7404781
## -Number of clusters: 8 DONE!
## [1] "running tsne for sample 2016-10-31_PANEL 1_DHS_Group two_F1637276_038.fcs"
## Running t-SNE...with seed 42 DONE
## Running DensVM...Testing kernel bandwidth for 20 points in the range min= 0.6962965 to max= 6.962965
## This will take a while...
## Computing number of peaks for kernel bandwidth = 0.6962965
## Computing number of peaks for kernel bandwidth = 1.026121
## Computing number of peaks for kernel bandwidth = 1.355946
## Computing number of peaks for kernel bandwidth = 1.685771
## Computing number of peaks for kernel bandwidth = 2.015595
## Computing number of peaks for kernel bandwidth = 2.34542
## Computing number of peaks for kernel bandwidth = 2.675245
## Computing number of peaks for kernel bandwidth = 3.005069
## Computing number of peaks for kernel bandwidth = 3.334894
## Computing number of peaks for kernel bandwidth = 3.664718
## Computing number of peaks for kernel bandwidth = 3.994543
## Computing number of peaks for kernel bandwidth = 4.324368
## Computing number of peaks for kernel bandwidth = 4.654192
## Computing number of peaks for kernel bandwidth = 4.984017
## Computing number of peaks for kernel bandwidth = 5.313842
## Computing number of peaks for kernel bandwidth = 5.643666
## Computing number of peaks for kernel bandwidth = 5.973491
## Computing number of peaks for kernel bandwidth = 6.303316
## Computing number of peaks for kernel bandwidth = 6.63314
## Computing number of peaks for kernel bandwidth = 6.962965
## DONE!



## [1] "/Volumes/Beta2/flow/wsp//2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs_panel1Rename.wsp"
## [1] "2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## [1] "running phenograph for sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Running PhenoGraph...
## Run Rphenograph starts:
## -Input data of 5000 rows and 13 columns
## -k is set to 30
## Finding nearest neighbors...DONE ~ 0.437 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.84 s
## Build undirected graph from the weighted links...DONE ~ 0.362 s
## Run louvain clustering on the graph ...DONE ~ 0.321 s
## Run Rphenograph DONE, took a total of 2.96000000000015s.

## Return a community class
## -Modularity value: 0.7555513
## -Number of clusters: 8 DONE!
## [1] "running tsne for sample 2017-02-01_PANEL 1_ZF_group one_F1652202_024.fcs"
## Running t-SNE...with seed 42 DONE
## Running DensVM...Testing kernel bandwidth for 20 points in the range min= 0.7959007 to max= 7.959007
## This will take a while...
## Computing number of peaks for kernel bandwidth = 0.7959007
## Computing number of peaks for kernel bandwidth = 1.172906
## Computing number of peaks for kernel bandwidth = 1.549912
## Computing number of peaks for kernel bandwidth = 1.926917
## Computing number of peaks for kernel bandwidth = 2.303923
## Computing number of peaks for kernel bandwidth = 2.680929
## Computing number of peaks for kernel bandwidth = 3.057934
## Computing number of peaks for kernel bandwidth = 3.43494
## Computing number of peaks for kernel bandwidth = 3.811945
## Computing number of peaks for kernel bandwidth = 4.188951
## Computing number of peaks for kernel bandwidth = 4.565957
## Computing number of peaks for kernel bandwidth = 4.942962
## Computing number of peaks for kernel bandwidth = 5.319968
## Computing number of peaks for kernel bandwidth = 5.696973
## Computing number of peaks for kernel bandwidth = 6.073979
## Computing number of peaks for kernel bandwidth = 6.450985
## Computing number of peaks for kernel bandwidth = 6.82799
## Computing number of peaks for kernel bandwidth = 7.204996
## Computing number of peaks for kernel bandwidth = 7.582001
## Computing number of peaks for kernel bandwidth = 7.959007
## DONE!



## [1] "/Volumes/Beta2/flow/wsp//2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs_panel1Rename.wsp"
## [1] "2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## [1] "running phenograph for sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Running PhenoGraph...
## Run Rphenograph starts:
## -Input data of 5000 rows and 13 columns
## -k is set to 30
## Finding nearest neighbors...DONE ~ 0.547 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.638 s
## Build undirected graph from the weighted links...DONE ~ 0.362 s
## Run louvain clustering on the graph ...DONE ~ 0.338 s
## Run Rphenograph DONE, took a total of 2.88499999999999s.

## Return a community class
## -Modularity value: 0.7106201
## -Number of clusters: 12 DONE!
## [1] "running tsne for sample 2017-02-24_PANEL 1_RR_group one_F1635691_020.fcs"
## Running t-SNE...with seed 42 DONE
## Running DensVM...Testing kernel bandwidth for 20 points in the range min= 0.849329 to max= 8.49329
## This will take a while...
## Computing number of peaks for kernel bandwidth = 0.849329
## Computing number of peaks for kernel bandwidth = 1.251643
## Computing number of peaks for kernel bandwidth = 1.653956
## Computing number of peaks for kernel bandwidth = 2.05627
## Computing number of peaks for kernel bandwidth = 2.458584
## Computing number of peaks for kernel bandwidth = 2.860898
## Computing number of peaks for kernel bandwidth = 3.263211
## Computing number of peaks for kernel bandwidth = 3.665525
## Computing number of peaks for kernel bandwidth = 4.067839
## Computing number of peaks for kernel bandwidth = 4.470153
## Computing number of peaks for kernel bandwidth = 4.872466
## Computing number of peaks for kernel bandwidth = 5.27478
## Computing number of peaks for kernel bandwidth = 5.677094
## Computing number of peaks for kernel bandwidth = 6.079408
## Computing number of peaks for kernel bandwidth = 6.481721
## Computing number of peaks for kernel bandwidth = 6.884035
## Computing number of peaks for kernel bandwidth = 7.286349
## Computing number of peaks for kernel bandwidth = 7.688663
## Computing number of peaks for kernel bandwidth = 8.090976
## Computing number of peaks for kernel bandwidth = 8.49329
## DONE!



## [1] "/Volumes/Beta2/flow/wsp//2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs_panel1Rename.wsp"
## [1] "2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Parsing 1 samples
## windows version of flowJo workspace recognized.
## version X
## Creating flowSet...
## loading data: /Volumes/Beta2/flow/testNovels//2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs
## Compensating
## gating ...
## done!
## [1] "2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Warning in dir.create(outputDir): '/Volumes/Beta2/flow/novelResults'
## already exists
## [1] "clustering sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## [1] "running phenograph for sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Running PhenoGraph...
## Run Rphenograph starts:
## -Input data of 5000 rows and 13 columns
## -k is set to 30
## Finding nearest neighbors...DONE ~ 0.439 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.741 s
## Build undirected graph from the weighted links...DONE ~ 0.358 s
## Run louvain clustering on the graph ...DONE ~ 0.354 s
## Run Rphenograph DONE, took a total of 2.89199999999983s.

## Return a community class
## -Modularity value: 0.7553768
## -Number of clusters: 10 DONE!
## [1] "running tsne for sample 2017-02-24_PANEL 1_RR_group one_F1652401_015.fcs"
## Running t-SNE...with seed 42 DONE
## Running DensVM...Testing kernel bandwidth for 20 points in the range min= 0.772674 to max= 7.72674
## This will take a while...
## Computing number of peaks for kernel bandwidth = 0.772674
## Computing number of peaks for kernel bandwidth = 1.138677
## Computing number of peaks for kernel bandwidth = 1.504681
## Computing number of peaks for kernel bandwidth = 1.870684
## Computing number of peaks for kernel bandwidth = 2.236688
## Computing number of peaks for kernel bandwidth = 2.602691
## Computing number of peaks for kernel bandwidth = 2.968695
## Computing number of peaks for kernel bandwidth = 3.334698
## Computing number of peaks for kernel bandwidth = 3.700702
## Computing number of peaks for kernel bandwidth = 4.066705
## Computing number of peaks for kernel bandwidth = 4.432709
## Computing number of peaks for kernel bandwidth = 4.798712
## Computing number of peaks for kernel bandwidth = 5.164716
## Computing number of peaks for kernel bandwidth = 5.530719
## Computing number of peaks for kernel bandwidth = 5.896723
## Computing number of peaks for kernel bandwidth = 6.262726
## Computing number of peaks for kernel bandwidth = 6.62873
## Computing number of peaks for kernel bandwidth = 6.994733
## Computing number of peaks for kernel bandwidth = 7.360736
## Computing number of peaks for kernel bandwidth = 7.72674
## DONE!



